---
title: "Replication of the Data Construction for 'Threats and Commitments: International Tribunals and Domestic Trials in Peace Negotiations'"
author: "Genevieve Bates"
date: "09/2025"
output: html_document
---

```{r setup, include=FALSE}
# Cleaning the working memory:
rm(list=ls(all=TRUE))

knitr::opts_chunk$set(echo = FALSE,
                      warning = FALSE,
                      results = 'asis')

setwd("~/Replication")

library(dplyr)
library(tidyverse)
library(readr)
library(readxl)
library(haven)
library(countrycode)
library(zoo)
library(date)
library(WDI)
```

```{r VDEM DATA, echo=FALSE}
vdem_raw <- read_rds("Data/Raw/V-Dem-CY-Full+Others-v13.rds")

vdem <- vdem_raw%>%
  select(country_name,COWcode,year,v2x_partipdem,v2xcl_acjst)%>% 
  rename(country = country_name,
         ccode = COWcode) %>%
  mutate(ccode = as.numeric(ccode),
         year = as.numeric(year),
         v2x_partipdem = as.numeric(v2x_partipdem),
         v2xcl_acjst = as.numeric(v2xcl_acjst),
         country = gsub("Democratic Republic of the Congo", "Democratic Republic of Congo", country),
         country = gsub("Ivory Coast", "Cote d'Ivoire", country),
         country = gsub("Republic of the Congo", "Republic of Congo", country),
         country = gsub("Burma/Myanmar", "Myanmar", country),
         country = gsub("Palestine/West Bank", "Palestine", country),
         country = gsub("The Gambia", "Gambia", country),
         country = gsub("Yemen, Rep.", "Yemen", country))
```

```{r WORLD BANK DATA, echo=FALSE}
gdppc <- WDI(indicator = "NY.GDP.PCAP.KD")
odagni <- WDI(indicator = "DT.ODA.ODAT.GN.ZS")

worldbank <- gdppc %>%
  left_join(odagni)%>%
  rename(gdppc = NY.GDP.PCAP.KD,
         odagni = DT.ODA.ODAT.GN.ZS)%>%
  mutate(year = as.numeric(year),
         gdppc = as.numeric(gdppc),
         odagni = as.numeric(odagni))%>%
  select(country,year,gdppc,odagni)%>%
#Fixing some of the names, because we will merge on country(name) and year later
  mutate(country = gsub("Congo, Dem. Rep.", "Democratic Republic of Congo", country),
         country = gsub("Congo, Rep.", "Republic of Congo", country),
         country = gsub("Syrian Arab Republic", "Syria", country),
         country = gsub("West Bank and Gaza", "Palestine", country),
         country = gsub("Gambia, The", "Gambia", country),
         country = gsub("Brunei Darussalam", "Brunei", country),
         country = gsub("Cabo Verde", "Cape Verde", country),
         country = gsub("Egypt, Arab Rep.", "Egypt", country),
         country = gsub("Iran, Islamic Rep.", "Iran", country),
         country = gsub("Korea, Dem. People’s Rep.", "North Korea", country),
         country = gsub("Korea, Rep.", "South Korea", country),
         country = gsub("Kyrgyz Republic", "Kyrgyzstan", country),
         country = gsub("Lao PDR", "Laos", country),
         country = gsub("Micronesia, Fed. Sts.", "Micronesia (Federated States of)", country),
         country = gsub("Russian Federation", "Russia", country),
         country = gsub("Venezuela, RB", "Venezuela", country),
         country = gsub("Bahamas, The", "Bahamas", country),
         country = gsub("Yemen, Rep.", "Yemen", country))

remove(gdppc,odagni)
```

```{r HUMAN RIGHTS DATA, echo=FALSE}
#Adding Human Rights Org Data
HRO_raw <- read_dta("/Users/genbates/Library/CloudStorage/Dropbox/DatasetsGB//MurdieDavis2010/ISQ2010MurdieDavis_finaltoISQ.dta")

hro <- HRO_raw%>%
  dplyr::select(NAMES_STD,year,cowcode,HROsecretariatlocation,hrfilled)%>%
  rename(country = NAMES_STD,
         ccode = cowcode) %>%
  mutate(HROsecretariatlocation = as.numeric(HROsecretariatlocation),
         year = as.numeric(year),
         country = as.character(country),
         ccode = as.numeric(ccode),
         hrfilled = as.numeric(hrfilled),
         #I have to distinguish between Congo and DRC in the data, which I do here because we'll be merging on country name
         country = ifelse(country=="Congo"&ccode==484,"Republic of Congo",country),
         country = ifelse(country=="Democratic Republic of the Congo"&ccode==490,"Democratic Republic of Congo",country))


remove(HRO_raw)
```

```{r ICC DATA, echo=FALSE}
#Adding ICC data
iccdata_raw <- read_csv("Data/Raw/ICCInvolvementData.csv")

iccdata <- iccdata_raw%>%
  select(country,year,region,ICC_ratification,threati,intervention,unscref,art14ref,propmo,stage,extent,threatm)

remove(iccdata_raw)
```


```{r CONFLICT DATA, echo=FALSE} 
GED_raw <- read_csv("Data/Raw/ged211.csv")

GED <- GED_raw %>% 
  mutate(actora=side_a,
         actorb=side_b)%>%
  dplyr::select(id,country,conflict_new_id,year,active_year,date_start,date_end,type_of_violence,deaths_a,deaths_b,deaths_civilians,deaths_unknown,best,actora,actorb)%>%
  #creating a variable that tracks if the government committed atrocities (killing civilians)
  mutate(govat=ifelse(str_starts(actora, "Government")&str_starts(actorb, "Civilians"),1,0),
         #creating a variable that tracks if an actor other than the government committed atrocities (killing civilians)
         ngat=ifelse(govat==0&str_starts(actorb, "Civilians"),1,0))


ACD_raw <- read_csv("/Users/genbates/Library/CloudStorage/Dropbox/DatasetsGB/UCDP_Conflict/Country/21.1/ucdp-prio-acd-211.csv")

ACD <- ACD_raw %>% dplyr::select(conflict_id,year,side_a,side_b,incompatibility,intensity_level,cumulative_intensity,type_of_conflict)%>%
  mutate(conflict_new_id = conflict_id)

remove(ACD_raw,GED_raw)


#HERE, I MERGE THE DATA AND FILTER OUT ALL EXTRA-SYSTEMIC AND PURELY INTERNATIONAL ARMED CONFLICT. I INCLUDE INTERNATIONALIZED INTRA-STATE CONFLICT, DESPITE THE FACT THAT I DON'T INCLUDE THOSE PEACE AGREEMENTS IN MY ANALYSIS BECAUSE THE CONFLICT-LEVEL FACTORS LIKE DEATH AND ATROCITIES WILL LIKELY MATTER EVEN IF THE DYNAMICS OF FORMAL PEACE NEGOTIATIONS IN THOSE SITUATIONS ARE DIFFERENT
mydata <- GED%>%
  #first I join the GED data with the Armed Conflict Country - Year data.
  left_join(ACD)%>%
  #then I remove conflict_id because I need a clean version for the next merge
  dplyr::select(-conflict_id)%>%
  #now I merge the One Sided Violence Data
  #left_join(OSV) %>%
  #and here is where I remove international conflict. I tried to just filter by type of conflict, but R doesn't know what to do with the NAs, so instead I created a new variable that fills out the NAs and filtered that way
   mutate(international = ifelse(type_of_conflict==1|type_of_conflict==2,1,0),
          international = ifelse(is.na(type_of_conflict),0,international),
          active = ifelse(active_year==1&type_of_violence==1|active_year==1&type_of_violence==2,1,0))%>%
  filter(international==0)%>%
   group_by(country,year)%>%
  mutate(activecum = cumsum(active))

#civdeaths is a measure of the overall level of civilian deaths in a country for the length of the panel.
civdeaths <- mydata%>%
  dplyr::select(country,year,deaths_civilians)%>%
  group_by(country,year)%>%
  # Create running index of observations for each country-year.
  mutate(index = row_number(),
         civdeaths_cumulative = cumsum(deaths_civilians))%>%
  #I created the cumulative sum above, but now I need to collapse it all into one country observation per year
  slice_max(n = 1, index)%>%
  ungroup()%>%
  dplyr::select(country,year,civdeaths_cumulative)

#GOVERNMENT ATROCITIES -- tracking civilian deaths for which the government is responsible
govat <- mydata%>%filter(govat==1)%>%
  dplyr::select(country,year,deaths_civilians)%>%
  group_by(country,year)%>%
  # Create running index of observations for each country-year.
  mutate(index = row_number(),
         cumsumgat = cumsum(deaths_civilians))%>%
  #I created the cumulative sum above, but now I need to collapse it all into one country observation per year
  slice_max(n = 1, index)%>%
  ungroup()%>%
  dplyr::select(country,year,cumsumgat)

#REBEL ATROCITIES -- tracking civilian deaths for which the rebels are responsible
rebat <- mydata%>%filter(ngat==1)%>%
  dplyr::select(country,year,deaths_civilians)%>%
  group_by(country,year)%>%
  # Create running index of observations for each country-year.
  mutate(index = row_number(),
         cumsumrebat = cumsum(deaths_civilians))%>%
  #I created the cumulative sum above, but now I need to collapse it all into one country observation per year
  slice_max(n = 1, index)%>%
  ungroup()%>%
  dplyr::select(country,year,cumsumrebat)

#GOVERNMENT CASUALTIES 
govcas <- mydata%>%filter(type_of_violence==1)%>%
  dplyr::select(country,year,deaths_a)%>%
  group_by(country,year)%>%
   # Create running index of observations for each country-year.
  mutate(index = row_number(),
         govcasualties = cumsum(deaths_a))%>%
  #I created the cumulative sum above, but now I need to collapse it all into one country observation per year
  slice_max(n = 1, index)%>%
  ungroup()%>%
  dplyr::select(country,year,govcasualties)

#REBEL CASUALTIES
rebcas <- mydata%>%filter(type_of_violence==1)%>%
  dplyr::select(country,year,deaths_b)%>%
  group_by(country,year)%>%
   # Create running index of observations for each country-year.
  mutate(index = row_number(),
         rebelcasualties = cumsum(deaths_b))%>%
  #I created the cumulative sum above, but now I need to collapse it all into one country observation per year
  slice_max(n = 1, index)%>%
  ungroup()%>%
  dplyr::select(country,year,rebelcasualties)
  

##THIS PRODUCES A COUNTRY-YEAR (PARTIAL) PANEL OF DATA
x <- mydata%>%
  # Create running index of observations for each country-year. I created the cumulative sums above, but now I need to collapse it all into one country observation per year
  group_by(country,year) %>%
  mutate(index = row_number())%>%
  group_by(country,year) %>%
  #I take the observation for each country year with the maximum index value. Because the higest index value for each year will have the sum of all of the observations in that year, it's the most important observation for each country-year.
  slice_max(n = 1, index)%>%
  ungroup()%>%
   dplyr::select(country,year,activecum)%>%
  mutate(active = ifelse(activecum>0,1,0))%>%
  dplyr::select(-activecum)

#Now just merging all of the various datasets together
y <- x%>%
  left_join(govcas)%>%
  left_join(rebcas)%>%
  left_join(civdeaths)%>%
  left_join(govat)%>%
  left_join(rebat)

#And now I'm fixing the names to make merging easier
y<-y %>%mutate(country = gsub("Macedonia, FYR", "North Macedonia", country),
                        country = gsub("Ivory Coast", "Cote d'Ivoire", country),
         country = gsub("Bosnia-Herzegovina", "Bosnia and Herzegovina", country),
         country = gsub("Cambodia \\(Kampuchea\\)", "Cambodia", country),
         country = gsub("DR Congo \\(Zaire\\)","Democratic Republic of the Congo", country),
         country = gsub("Kingdom of eSwatini \\(Swaziland\\)", "Eswatini", country),
         country = gsub("Madagascar \\(Malagasy)", "Madagascar", country),
         country = gsub("Myanmar \\(Burma\\)", "Myanmar", country),
         country = gsub("Russia \\(Soviet Union\\)", "Russia", country),
         country = gsub("Serbia \\(Yugoslavia\\)", "Serbia", country),
         country = gsub("Yemen \\(North Yemen\\)", "Yemen", country),
         country = gsub("Zimbabwe \\(Rhodesia\\)", "Zimbabwe", country))

#I'm using the vdem panel to make it easier to filter the conflict and atrocities data we don't need
z <- vdem%>%
  dplyr::select(country,year,ccode)%>%
  filter(year >= 1989)


paneldata <- z%>%
  left_join(y)%>%
  #filling in the zeros for country-years where there were no deaths
  mutate(active = ifelse(is.na(active),0,active),
         govcasualties = ifelse(is.na(govcasualties),0,govcasualties),
         rebelcasualties = ifelse(is.na(rebelcasualties),0,rebelcasualties),
         civdeaths = ifelse(is.na(civdeaths_cumulative),0,civdeaths_cumulative),
         govat = ifelse(is.na(cumsumgat),0,cumsumgat),
         rebat = ifelse(is.na(cumsumrebat),0,cumsumrebat))%>%
  dplyr::select(-cumsumgat,-cumsumrebat)


paneldata <- paneldata%>%
  #creating a measure of if the government is involved in atrocities for the full panel
  mutate(govactor = ifelse(govat>0,1,0),
          #creating a measure of atrocities committed for the full panel. it's the sum of gov atrocities and rebel atrocities in a given year
         atr_annual = govat+rebat,
         #just creating a blunt measure for if the relevant events happened before 2002
         before2002 = ifelse(year<2002,1,0))%>%
  group_by(country)%>%
          #cumulative civilian deaths
  mutate(civdeaths_cumulative = cumsum(civdeaths),
          #cumulative atrocities
         atrocities = cumsum(atr_annual),
          #cumulative atrocities committed by the government
         govatrocities = cumsum(govat),
         #conflict variable -- if UCDP listed a conflict as active in a country in a year, this takes the value of 1, otherwise 0
         conflict = ifelse(active==1,1,0),
         conflict = ifelse(is.na(conflict),0,conflict),
         index = row_number(),
         #keeping a running count of the number of years where there was active conflict in a country
         conflictyrs = cumsum(conflict),
         #calculating the overall number of government casualties and the overall number of revel casualties
         govc = cumsum(govcasualties),
         rebc = cumsum(rebelcasualties))

post2002 <- paneldata%>%
  select(country,year,civdeaths,atr_annual)%>%
  filter(year>=2002)%>%
  group_by(country)%>%
  #cumulative civilian deaths post 2002
  mutate(civdeaths_post2002 = cumsum(civdeaths),
          #cumulative overall atrocities post 2002
         atr_post2002 = cumsum(atr_annual))%>%
  select(-civdeaths,-atr_annual)


conflict <- paneldata%>%
  #now I'm merging the general data with the post-2002 atrocities data
  left_join(post2002)%>%
  #and now creating a casualties variables that combines government casualties and rebel casualties
  mutate(casualties = govc + rebc)

remove(paneldata,GED,ACD,civdeaths,govat,govcas,mydata,rebat,rebcas,x,y,z,post2002)
```


```{r MERGING EVERYTHING INTO A SINGLE PANEL}
df <- worldbank %>%
  left_join(vdem) %>%
  left_join(hro)%>%
  left_join(conflict)

#now I'm merging the rest of the data into the ICC data 
df2 <- left_join(iccdata,df)


#This code interpolates the human rights organization and world bank data, logs the gdp data, and finally lags all of the relevant data.
df3 <- df2%>%
  dplyr::select(-before2002)%>%
  mutate(hrfilled = na.approx(df2$hrfilled, x = index(df2$country), rule=2),
         gdppc = na.approx(df2$gdppc, x = index(df2$country), rule=2),
         odagni = na.approx(df2$odagni, x = index(df2$country), rule=2),
         loggdppc = log(gdppc),
         log_casualties = log(casualties+1),
         log_atrpost2002 = log(atr_post2002+1))%>%
  arrange(country, year) %>%
  group_by(country) %>%
  mutate(lag_loggdppc = dplyr::lag(loggdppc, n = 1, default=NA),
         threatilag = dplyr::lag(threati, n = 1, default=NA),
         interventionlag = dplyr::lag(intervention, n = 1, default=NA),
         lag_casualties = dplyr::lag(log_casualties, n = 1, default=NA),
         lag_atrpost2002 = dplyr::lag(log_atrpost2002, n = 1, default=NA),
         lag_partipdem = dplyr::lag(v2x_partipdem, n = 1, default=NA),
         lag_acjst = dplyr::lag(v2xcl_acjst, n = 1, default=NA),
         lag_hrfilled = dplyr::lag(hrfilled, n = 1, default=NA),
         lag_ICCrat = dplyr::lag(ICC_ratification, n = 1, default=NA),
         lag_odagni = dplyr::lag(odagni, n = 1, default=NA))%>%
  filter(year >= 2002)%>%
  #creating a time trend
  mutate(time = (year - 2002) + 1)

#creating variable for if country is in PAX data
df3 <- df3%>%
  mutate(agmt = ifelse(country == "Afghanistan"|country == "Algeria"|country == "Angola"|country == "Burundi"|country == "Central African Republic"|country == "Chad"|country == "Colombia"|country == "Comoros"|country == "Cote d'Ivoire"|country == "Democratic Republic of Congo"|country == "Ethiopia"|country == "Georgia"|country == "Guinea"|country == "India"|country == "Indonesia"|country == "Iraq"|country == "Israel"|country == "Kosovo"|country == "Lesotho"|country == "Liberia"|country == "Libya"|country == "Mali"|country == "Moldova"|country == "Morocco"|country == "Mozambique"|country == "Myanmar"|country == "Nepal"|country == "Nigeria"|country == "Pakistan"|country == "Palestine"|country == "Philippines"|country == "Republic of Congo"|country == "Senegal"|country == "Somalia"|country == "South Sudan"|country == "Spain"|country == "Sri Lanka"|country == "Sudan"|country == "Syria"|country == "Uganda"|country == "Ukraine"|country == "United Kingdom"|country == "Yemen", 1, 0))%>%
  select(country,year,region,ICC_ratification,threati,intervention,unscref,art14ref,propmo,stage,extent,threatm,lag_loggdppc,lag_odagni,ccode,v2x_partipdem,v2xcl_acjst,hrfilled,lag_casualties,log_atrpost2002,threatilag,interventionlag,lag_hrfilled,lag_ICCrat,lag_atrpost2002,lag_partipdem,lag_acjst,agmt,time)

#now I'm saving the whole as both RData and csv files. The panel data will be important for making Figure 1 in the main text of the paper
write.csv(df3, file="Data/Analysis/PanelData.csv")

save(df3, file = "Data/Analysis/PanelData.RData")

remove(conflict,df,df2,hro,iccdata,iccdata_raw,vdem,vdem_raw,worldbank)
```



```{r READING IN PAX DATA, echo=FALSE}
#But now I need to make the analysis data. That means merging everything with the PA-X data
d <- read_csv("Data/Raw/pax_all_agreements_data.csv")

data <- d%>%
  dplyr::select(Con,Contp,PP,Agt,Reg,AgtId,Dat,Lgt,Agtp,Loc1ISO,UcdpCon,TjAm,TjAmBan,TjCou,TjJaNc,TjJaIc,TjMech,TjVet,TjVic,TjRep,ImUN,ImOth)%>%
#This is filtering the data for only domestic conflict
filter(Agtp=="Intra"|Agtp=="IntraLocal")
```

```{r PAX DATA CLEAN, echo=FALSE}
#turning the dates of the agreements into years and adding country names from ISO codes
data <- data%>%
  mutate(Dat = as.Date(Dat),
         year = as.numeric(format(Dat,'%Y')),
         country = countrycode(Loc1ISO, "iso3c", "country.name"),
         amnesty  = ifelse(TjAm > 1, 1, 0),
         #create binary variable for if the agreement included a trial commitment
         natcourts = ifelse(TjJaNc > 0, 1, 0),
         #create binary variable for if the agreement included a trial commitment or an amnesty commitment that prohibits amnestying international crimes
         natcourts2 = ifelse(TjJaNc > 0|TjAmBan >0,1, 0),
         #create binary variable for if the agreement included a strong trial commitment
         natcourts3 = ifelse(TjCou==3,1,0),
         #create binary variable for if the agreement included an international trial commitment
         intcourts = ifelse(TjJaIc > 0, 1, 0),
         #create binary variable for if the agreement included a TJ commitment in the "other" category
         othertj = ifelse(TjMech > 1, 1, 0),
         #create binary variable for if the agreement included a vetting commitment
         vetting = ifelse(TjVet > 1, 1, 0),
         #create binary variable for if the agreement included a reparations commitment
         reparations = ifelse(TjRep > 1, 1, 0),
         #create binary variable for if the agreement included a compensation commitment
         victimcomp = ifelse(TjVic > 1, 1, 0),
         #create binary variable for if the agreement included a TJ commitment of any kind
         tj = ifelse(amnesty==1|natcourts==1|natcourts2==1|intcourts==1|othertj==1|vetting==1|reparations==1|victimcomp==1, 1, 0),
         #create variable for if the agreement included a UN or other IO signature
         ic_UNOth = ImUN + ImOth)%>%
  #updating the country names to make names consistent across datasets
  mutate(country = gsub("Côte d’Ivoire", "Cote d'Ivoire", country),
         country = gsub("Congo - Kinshasa", "Democratic Republic of Congo", country),
         country = gsub("Yugoslavia", "Serbia", country),
         country = gsub("Congo - Brazzaville", "Republic of Congo", country),
         country = gsub("Myanmar \\(Burma\\)", "Myanmar", country),
         #For some reason, it doesn't like me trying to change some of these names. I'm changing them manually
         country = ifelse(PP==33,"Palestine",country),
         country = ifelse(AgtId == 2030,"Kosovo",country),
         country = ifelse(AgtId == 2032,"Kosovo",country),
         country = ifelse(AgtId == 725,"Georgia",country),
         country = ifelse(AgtId == 2231,"Ethiopia",country),
         country = ifelse(country=="South Sudan"&year <= 2011,"Sudan",country),
         Loc1ISO = ifelse(PP==33,"PSE",Loc1ISO),
         Loc1ISO = ifelse(AgtId==725,"GEO",Loc1ISO),
         Loc1ISO = ifelse(AgtId==2231,"ETH",Loc1ISO),
         Loc1ISO = ifelse(country=="South Sudan"&year <= 2011,"SDN",Loc1ISO))

#Adding COW Code and regions
data <- data %>%
  mutate(ccode = countrycode(data$Loc1ISO, "iso3c", "cown"),
         region = countrycode(data$Loc1ISO, "iso3c", "region"))
```


```{r MERGING WITH AGREEMENT DATA, echo=FALSE}
#Merging the PA-X data with the panel data
data <- data%>%
  dplyr::select(country,year,UcdpCon,AgtId,natcourts,natcourts2,natcourts3,tj,ic_UNOth)%>%
  left_join(df3)%>%
  #creating the Africa dummy variable
  mutate(Africa = ifelse(region=="Africa",1,0))
 
#for completeness' sake, I'm adding the country name associated with this agreement. It's not included in the analysis because there isn't relevant control data (so it gets filtered out), but I just like to keep things consistent
data$country[data$AgtId == 1894] <- "Kurdistan"

remove(d,df3)

save(data, file = "Data/Analysis/AnalysisData.RData")
write.csv(data, file="Data/Analysis/AnalysisData.csv")

```
                       